{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/AnamikaMangore/AnamikaMangore/blob/main/Breast_Cancer_Classification_using_Machine_Learning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "X3wT8l6lfj--"
      },
      "source": [
        "Importing the Dependencies"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XqsQmOXGXXTe"
      },
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import sklearn.datasets\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.metrics import accuracy_score"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "pwJ9zLukg3Q_"
      },
      "source": [
        "Data Collection & Processing"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "j6bMZMKUgz7L"
      },
      "source": [
        "# loading the data from sklearn\n",
        "breast_cancer_dataset = sklearn.datasets.load_breast_cancer()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xdY6i73KgkDG",
        "outputId": "960e144c-2185-49ef-9a7b-c1085b46fcc8"
      },
      "source": [
        "print(breast_cancer_dataset)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,\n",
            "        1.189e-01],\n",
            "       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,\n",
            "        8.902e-02],\n",
            "       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,\n",
            "        8.758e-02],\n",
            "       ...,\n",
            "       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,\n",
            "        7.820e-02],\n",
            "       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,\n",
            "        1.240e-01],\n",
            "       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,\n",
            "        7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n",
            "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n",
            "       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,\n",
            "       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,\n",
            "       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,\n",
            "       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n",
            "       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n",
            "       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n",
            "       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,\n",
            "       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n",
            "       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,\n",
            "       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
            "       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,\n",
            "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n",
            "       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n",
            "       0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,\n",
            "       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n",
            "       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,\n",
            "       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n",
            "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,\n",
            "       1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n",
            "       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n",
            "       1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n",
            "       1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n",
            "       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
            "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]), 'frame': None, 'target_names': array(['malignant', 'benign'], dtype='<U9'), 'DESCR': '.. _breast_cancer_dataset:\\n\\nBreast cancer wisconsin (diagnostic) dataset\\n--------------------------------------------\\n\\n**Data Set Characteristics:**\\n\\n    :Number of Instances: 569\\n\\n    :Number of Attributes: 30 numeric, predictive attributes and the class\\n\\n    :Attribute Information:\\n        - radius (mean of distances from center to points on the perimeter)\\n        - texture (standard deviation of gray-scale values)\\n        - perimeter\\n        - area\\n        - smoothness (local variation in radius lengths)\\n        - compactness (perimeter^2 / area - 1.0)\\n        - concavity (severity of concave portions of the contour)\\n        - concave points (number of concave portions of the contour)\\n        - symmetry\\n        - fractal dimension (\"coastline approximation\" - 1)\\n\\n        The mean, standard error, and \"worst\" or largest (mean of the three\\n        worst/largest values) of these features were computed for each image,\\n        resulting in 30 features.  For instance, field 0 is Mean Radius, field\\n        10 is Radius SE, field 20 is Worst Radius.\\n\\n        - class:\\n                - WDBC-Malignant\\n                - WDBC-Benign\\n\\n    :Summary Statistics:\\n\\n    ===================================== ====== ======\\n                                           Min    Max\\n    ===================================== ====== ======\\n    radius (mean):                        6.981  28.11\\n    texture (mean):                       9.71   39.28\\n    perimeter (mean):                     43.79  188.5\\n    area (mean):                          143.5  2501.0\\n    smoothness (mean):                    0.053  0.163\\n    compactness (mean):                   0.019  0.345\\n    concavity (mean):                     0.0    0.427\\n    concave points (mean):                0.0    0.201\\n    symmetry (mean):                      0.106  0.304\\n    fractal dimension (mean):             0.05   0.097\\n    radius (standard error):              0.112  2.873\\n    texture (standard error):             0.36   4.885\\n    perimeter (standard error):           0.757  21.98\\n    area (standard error):                6.802  542.2\\n    smoothness (standard error):          0.002  0.031\\n    compactness (standard error):         0.002  0.135\\n    concavity (standard error):           0.0    0.396\\n    concave points (standard error):      0.0    0.053\\n    symmetry (standard error):            0.008  0.079\\n    fractal dimension (standard error):   0.001  0.03\\n    radius (worst):                       7.93   36.04\\n    texture (worst):                      12.02  49.54\\n    perimeter (worst):                    50.41  251.2\\n    area (worst):                         185.2  4254.0\\n    smoothness (worst):                   0.071  0.223\\n    compactness (worst):                  0.027  1.058\\n    concavity (worst):                    0.0    1.252\\n    concave points (worst):               0.0    0.291\\n    symmetry (worst):                     0.156  0.664\\n    fractal dimension (worst):            0.055  0.208\\n    ===================================== ====== ======\\n\\n    :Missing Attribute Values: None\\n\\n    :Class Distribution: 212 - Malignant, 357 - Benign\\n\\n    :Creator:  Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian\\n\\n    :Donor: Nick Street\\n\\n    :Date: November, 1995\\n\\nThis is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets.\\nhttps://goo.gl/U2Uwz2\\n\\nFeatures are computed from a digitized image of a fine needle\\naspirate (FNA) of a breast mass.  They describe\\ncharacteristics of the cell nuclei present in the image.\\n\\nSeparating plane described above was obtained using\\nMultisurface Method-Tree (MSM-T) [K. P. Bennett, \"Decision Tree\\nConstruction Via Linear Programming.\" Proceedings of the 4th\\nMidwest Artificial Intelligence and Cognitive Science Society,\\npp. 97-101, 1992], a classification method which uses linear\\nprogramming to construct a decision tree.  Relevant features\\nwere selected using an exhaustive search in the space of 1-4\\nfeatures and 1-3 separating planes.\\n\\nThe actual linear program used to obtain the separating plane\\nin the 3-dimensional space is that described in:\\n[K. P. Bennett and O. L. Mangasarian: \"Robust Linear\\nProgramming Discrimination of Two Linearly Inseparable Sets\",\\nOptimization Methods and Software 1, 1992, 23-34].\\n\\nThis database is also available through the UW CS ftp server:\\n\\nftp ftp.cs.wisc.edu\\ncd math-prog/cpo-dataset/machine-learn/WDBC/\\n\\n.. topic:: References\\n\\n   - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction \\n     for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on \\n     Electronic Imaging: Science and Technology, volume 1905, pages 861-870,\\n     San Jose, CA, 1993.\\n   - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and \\n     prognosis via linear programming. Operations Research, 43(4), pages 570-577, \\n     July-August 1995.\\n   - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques\\n     to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) \\n     163-171.', 'feature_names': array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',\n",
            "       'mean smoothness', 'mean compactness', 'mean concavity',\n",
            "       'mean concave points', 'mean symmetry', 'mean fractal dimension',\n",
            "       'radius error', 'texture error', 'perimeter error', 'area error',\n",
            "       'smoothness error', 'compactness error', 'concavity error',\n",
            "       'concave points error', 'symmetry error',\n",
            "       'fractal dimension error', 'worst radius', 'worst texture',\n",
            "       'worst perimeter', 'worst area', 'worst smoothness',\n",
            "       'worst compactness', 'worst concavity', 'worst concave points',\n",
            "       'worst symmetry', 'worst fractal dimension'], dtype='<U23'), 'filename': 'breast_cancer.csv', 'data_module': 'sklearn.datasets.data'}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yFamhVTThMdK"
      },
      "source": [
        "# loading the data to a data frame\n",
        "data_frame = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 290
        },
        "id": "xLiv1maYiGVH",
        "outputId": "c745c324-d92e-4df8-bd04-683463e328c4"
      },
      "source": [
        "# print the first 5 rows of the dataframe\n",
        "data_frame.head()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \\\n",
              "0        17.99         10.38          122.80     1001.0          0.11840   \n",
              "1        20.57         17.77          132.90     1326.0          0.08474   \n",
              "2        19.69         21.25          130.00     1203.0          0.10960   \n",
              "3        11.42         20.38           77.58      386.1          0.14250   \n",
              "4        20.29         14.34          135.10     1297.0          0.10030   \n",
              "\n",
              "   mean compactness  mean concavity  mean concave points  mean symmetry  \\\n",
              "0           0.27760          0.3001              0.14710         0.2419   \n",
              "1           0.07864          0.0869              0.07017         0.1812   \n",
              "2           0.15990          0.1974              0.12790         0.2069   \n",
              "3           0.28390          0.2414              0.10520         0.2597   \n",
              "4           0.13280          0.1980              0.10430         0.1809   \n",
              "\n",
              "   mean fractal dimension  ...  worst radius  worst texture  worst perimeter  \\\n",
              "0                 0.07871  ...         25.38          17.33           184.60   \n",
              "1                 0.05667  ...         24.99          23.41           158.80   \n",
              "2                 0.05999  ...         23.57          25.53           152.50   \n",
              "3                 0.09744  ...         14.91          26.50            98.87   \n",
              "4                 0.05883  ...         22.54          16.67           152.20   \n",
              "\n",
              "   worst area  worst smoothness  worst compactness  worst concavity  \\\n",
              "0      2019.0            0.1622             0.6656           0.7119   \n",
              "1      1956.0            0.1238             0.1866           0.2416   \n",
              "2      1709.0            0.1444             0.4245           0.4504   \n",
              "3       567.7            0.2098             0.8663           0.6869   \n",
              "4      1575.0            0.1374             0.2050           0.4000   \n",
              "\n",
              "   worst concave points  worst symmetry  worst fractal dimension  \n",
              "0                0.2654          0.4601                  0.11890  \n",
              "1                0.1860          0.2750                  0.08902  \n",
              "2                0.2430          0.3613                  0.08758  \n",
              "3                0.2575          0.6638                  0.17300  \n",
              "4                0.1625          0.2364                  0.07678  \n",
              "\n",
              "[5 rows x 30 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-714bf4ea-b1b8-4eaf-816d-98bc22227c28\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>mean radius</th>\n",
              "      <th>mean texture</th>\n",
              "      <th>mean perimeter</th>\n",
              "      <th>mean area</th>\n",
              "      <th>mean smoothness</th>\n",
              "      <th>mean compactness</th>\n",
              "      <th>mean concavity</th>\n",
              "      <th>mean concave points</th>\n",
              "      <th>mean symmetry</th>\n",
              "      <th>mean fractal dimension</th>\n",
              "      <th>...</th>\n",
              "      <th>worst radius</th>\n",
              "      <th>worst texture</th>\n",
              "      <th>worst perimeter</th>\n",
              "      <th>worst area</th>\n",
              "      <th>worst smoothness</th>\n",
              "      <th>worst compactness</th>\n",
              "      <th>worst concavity</th>\n",
              "      <th>worst concave points</th>\n",
              "      <th>worst symmetry</th>\n",
              "      <th>worst fractal dimension</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>17.99</td>\n",
              "      <td>10.38</td>\n",
              "      <td>122.80</td>\n",
              "      <td>1001.0</td>\n",
              "      <td>0.11840</td>\n",
              "      <td>0.27760</td>\n",
              "      <td>0.3001</td>\n",
              "      <td>0.14710</td>\n",
              "      <td>0.2419</td>\n",
              "      <td>0.07871</td>\n",
              "      <td>...</td>\n",
              "      <td>25.38</td>\n",
              "      <td>17.33</td>\n",
              "      <td>184.60</td>\n",
              "      <td>2019.0</td>\n",
              "      <td>0.1622</td>\n",
              "      <td>0.6656</td>\n",
              "      <td>0.7119</td>\n",
              "      <td>0.2654</td>\n",
              "      <td>0.4601</td>\n",
              "      <td>0.11890</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>20.57</td>\n",
              "      <td>17.77</td>\n",
              "      <td>132.90</td>\n",
              "      <td>1326.0</td>\n",
              "      <td>0.08474</td>\n",
              "      <td>0.07864</td>\n",
              "      <td>0.0869</td>\n",
              "      <td>0.07017</td>\n",
              "      <td>0.1812</td>\n",
              "      <td>0.05667</td>\n",
              "      <td>...</td>\n",
              "      <td>24.99</td>\n",
              "      <td>23.41</td>\n",
              "      <td>158.80</td>\n",
              "      <td>1956.0</td>\n",
              "      <td>0.1238</td>\n",
              "      <td>0.1866</td>\n",
              "      <td>0.2416</td>\n",
              "      <td>0.1860</td>\n",
              "      <td>0.2750</td>\n",
              "      <td>0.08902</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>19.69</td>\n",
              "      <td>21.25</td>\n",
              "      <td>130.00</td>\n",
              "      <td>1203.0</td>\n",
              "      <td>0.10960</td>\n",
              "      <td>0.15990</td>\n",
              "      <td>0.1974</td>\n",
              "      <td>0.12790</td>\n",
              "      <td>0.2069</td>\n",
              "      <td>0.05999</td>\n",
              "      <td>...</td>\n",
              "      <td>23.57</td>\n",
              "      <td>25.53</td>\n",
              "      <td>152.50</td>\n",
              "      <td>1709.0</td>\n",
              "      <td>0.1444</td>\n",
              "      <td>0.4245</td>\n",
              "      <td>0.4504</td>\n",
              "      <td>0.2430</td>\n",
              "      <td>0.3613</td>\n",
              "      <td>0.08758</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>11.42</td>\n",
              "      <td>20.38</td>\n",
              "      <td>77.58</td>\n",
              "      <td>386.1</td>\n",
              "      <td>0.14250</td>\n",
              "      <td>0.28390</td>\n",
              "      <td>0.2414</td>\n",
              "      <td>0.10520</td>\n",
              "      <td>0.2597</td>\n",
              "      <td>0.09744</td>\n",
              "      <td>...</td>\n",
              "      <td>14.91</td>\n",
              "      <td>26.50</td>\n",
              "      <td>98.87</td>\n",
              "      <td>567.7</td>\n",
              "      <td>0.2098</td>\n",
              "      <td>0.8663</td>\n",
              "      <td>0.6869</td>\n",
              "      <td>0.2575</td>\n",
              "      <td>0.6638</td>\n",
              "      <td>0.17300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>20.29</td>\n",
              "      <td>14.34</td>\n",
              "      <td>135.10</td>\n",
              "      <td>1297.0</td>\n",
              "      <td>0.10030</td>\n",
              "      <td>0.13280</td>\n",
              "      <td>0.1980</td>\n",
              "      <td>0.10430</td>\n",
              "      <td>0.1809</td>\n",
              "      <td>0.05883</td>\n",
              "      <td>...</td>\n",
              "      <td>22.54</td>\n",
              "      <td>16.67</td>\n",
              "      <td>152.20</td>\n",
              "      <td>1575.0</td>\n",
              "      <td>0.1374</td>\n",
              "      <td>0.2050</td>\n",
              "      <td>0.4000</td>\n",
              "      <td>0.1625</td>\n",
              "      <td>0.2364</td>\n",
              "      <td>0.07678</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 30 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-714bf4ea-b1b8-4eaf-816d-98bc22227c28')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-714bf4ea-b1b8-4eaf-816d-98bc22227c28 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-714bf4ea-b1b8-4eaf-816d-98bc22227c28');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-cfdb62c3-3e20-444d-a53b-28258fe98c0e\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-cfdb62c3-3e20-444d-a53b-28258fe98c0e')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-cfdb62c3-3e20-444d-a53b-28258fe98c0e button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "data_frame"
            }
          },
          "metadata": {},
          "execution_count": 4
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UEvD_aTDiNLF"
      },
      "source": [
        "# adding the 'target' column to the data frame\n",
        "data_frame['label'] = breast_cancer_dataset.target"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 290
        },
        "id": "f_kmjEA5io2v",
        "outputId": "b39d783d-09ab-4cdd-aa5a-75467c5f6b90"
      },
      "source": [
        "# print last 5 rows of the dataframe\n",
        "data_frame.tail()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \\\n",
              "564        21.56         22.39          142.00     1479.0          0.11100   \n",
              "565        20.13         28.25          131.20     1261.0          0.09780   \n",
              "566        16.60         28.08          108.30      858.1          0.08455   \n",
              "567        20.60         29.33          140.10     1265.0          0.11780   \n",
              "568         7.76         24.54           47.92      181.0          0.05263   \n",
              "\n",
              "     mean compactness  mean concavity  mean concave points  mean symmetry  \\\n",
              "564           0.11590         0.24390              0.13890         0.1726   \n",
              "565           0.10340         0.14400              0.09791         0.1752   \n",
              "566           0.10230         0.09251              0.05302         0.1590   \n",
              "567           0.27700         0.35140              0.15200         0.2397   \n",
              "568           0.04362         0.00000              0.00000         0.1587   \n",
              "\n",
              "     mean fractal dimension  ...  worst texture  worst perimeter  worst area  \\\n",
              "564                 0.05623  ...          26.40           166.10      2027.0   \n",
              "565                 0.05533  ...          38.25           155.00      1731.0   \n",
              "566                 0.05648  ...          34.12           126.70      1124.0   \n",
              "567                 0.07016  ...          39.42           184.60      1821.0   \n",
              "568                 0.05884  ...          30.37            59.16       268.6   \n",
              "\n",
              "     worst smoothness  worst compactness  worst concavity  \\\n",
              "564           0.14100            0.21130           0.4107   \n",
              "565           0.11660            0.19220           0.3215   \n",
              "566           0.11390            0.30940           0.3403   \n",
              "567           0.16500            0.86810           0.9387   \n",
              "568           0.08996            0.06444           0.0000   \n",
              "\n",
              "     worst concave points  worst symmetry  worst fractal dimension  label  \n",
              "564                0.2216          0.2060                  0.07115      0  \n",
              "565                0.1628          0.2572                  0.06637      0  \n",
              "566                0.1418          0.2218                  0.07820      0  \n",
              "567                0.2650          0.4087                  0.12400      0  \n",
              "568                0.0000          0.2871                  0.07039      1  \n",
              "\n",
              "[5 rows x 31 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-70296931-dbdd-48d9-92d6-365c783259d9\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>mean radius</th>\n",
              "      <th>mean texture</th>\n",
              "      <th>mean perimeter</th>\n",
              "      <th>mean area</th>\n",
              "      <th>mean smoothness</th>\n",
              "      <th>mean compactness</th>\n",
              "      <th>mean concavity</th>\n",
              "      <th>mean concave points</th>\n",
              "      <th>mean symmetry</th>\n",
              "      <th>mean fractal dimension</th>\n",
              "      <th>...</th>\n",
              "      <th>worst texture</th>\n",
              "      <th>worst perimeter</th>\n",
              "      <th>worst area</th>\n",
              "      <th>worst smoothness</th>\n",
              "      <th>worst compactness</th>\n",
              "      <th>worst concavity</th>\n",
              "      <th>worst concave points</th>\n",
              "      <th>worst symmetry</th>\n",
              "      <th>worst fractal dimension</th>\n",
              "      <th>label</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>564</th>\n",
              "      <td>21.56</td>\n",
              "      <td>22.39</td>\n",
              "      <td>142.00</td>\n",
              "      <td>1479.0</td>\n",
              "      <td>0.11100</td>\n",
              "      <td>0.11590</td>\n",
              "      <td>0.24390</td>\n",
              "      <td>0.13890</td>\n",
              "      <td>0.1726</td>\n",
              "      <td>0.05623</td>\n",
              "      <td>...</td>\n",
              "      <td>26.40</td>\n",
              "      <td>166.10</td>\n",
              "      <td>2027.0</td>\n",
              "      <td>0.14100</td>\n",
              "      <td>0.21130</td>\n",
              "      <td>0.4107</td>\n",
              "      <td>0.2216</td>\n",
              "      <td>0.2060</td>\n",
              "      <td>0.07115</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>565</th>\n",
              "      <td>20.13</td>\n",
              "      <td>28.25</td>\n",
              "      <td>131.20</td>\n",
              "      <td>1261.0</td>\n",
              "      <td>0.09780</td>\n",
              "      <td>0.10340</td>\n",
              "      <td>0.14400</td>\n",
              "      <td>0.09791</td>\n",
              "      <td>0.1752</td>\n",
              "      <td>0.05533</td>\n",
              "      <td>...</td>\n",
              "      <td>38.25</td>\n",
              "      <td>155.00</td>\n",
              "      <td>1731.0</td>\n",
              "      <td>0.11660</td>\n",
              "      <td>0.19220</td>\n",
              "      <td>0.3215</td>\n",
              "      <td>0.1628</td>\n",
              "      <td>0.2572</td>\n",
              "      <td>0.06637</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>566</th>\n",
              "      <td>16.60</td>\n",
              "      <td>28.08</td>\n",
              "      <td>108.30</td>\n",
              "      <td>858.1</td>\n",
              "      <td>0.08455</td>\n",
              "      <td>0.10230</td>\n",
              "      <td>0.09251</td>\n",
              "      <td>0.05302</td>\n",
              "      <td>0.1590</td>\n",
              "      <td>0.05648</td>\n",
              "      <td>...</td>\n",
              "      <td>34.12</td>\n",
              "      <td>126.70</td>\n",
              "      <td>1124.0</td>\n",
              "      <td>0.11390</td>\n",
              "      <td>0.30940</td>\n",
              "      <td>0.3403</td>\n",
              "      <td>0.1418</td>\n",
              "      <td>0.2218</td>\n",
              "      <td>0.07820</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>567</th>\n",
              "      <td>20.60</td>\n",
              "      <td>29.33</td>\n",
              "      <td>140.10</td>\n",
              "      <td>1265.0</td>\n",
              "      <td>0.11780</td>\n",
              "      <td>0.27700</td>\n",
              "      <td>0.35140</td>\n",
              "      <td>0.15200</td>\n",
              "      <td>0.2397</td>\n",
              "      <td>0.07016</td>\n",
              "      <td>...</td>\n",
              "      <td>39.42</td>\n",
              "      <td>184.60</td>\n",
              "      <td>1821.0</td>\n",
              "      <td>0.16500</td>\n",
              "      <td>0.86810</td>\n",
              "      <td>0.9387</td>\n",
              "      <td>0.2650</td>\n",
              "      <td>0.4087</td>\n",
              "      <td>0.12400</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>568</th>\n",
              "      <td>7.76</td>\n",
              "      <td>24.54</td>\n",
              "      <td>47.92</td>\n",
              "      <td>181.0</td>\n",
              "      <td>0.05263</td>\n",
              "      <td>0.04362</td>\n",
              "      <td>0.00000</td>\n",
              "      <td>0.00000</td>\n",
              "      <td>0.1587</td>\n",
              "      <td>0.05884</td>\n",
              "      <td>...</td>\n",
              "      <td>30.37</td>\n",
              "      <td>59.16</td>\n",
              "      <td>268.6</td>\n",
              "      <td>0.08996</td>\n",
              "      <td>0.06444</td>\n",
              "      <td>0.0000</td>\n",
              "      <td>0.0000</td>\n",
              "      <td>0.2871</td>\n",
              "      <td>0.07039</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 31 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-70296931-dbdd-48d9-92d6-365c783259d9')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-70296931-dbdd-48d9-92d6-365c783259d9 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-70296931-dbdd-48d9-92d6-365c783259d9');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-6a6a8eca-88b2-4e57-953c-155db3f7f832\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-6a6a8eca-88b2-4e57-953c-155db3f7f832')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-6a6a8eca-88b2-4e57-953c-155db3f7f832 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe"
            }
          },
          "metadata": {},
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "sw3wjdK6iwK4",
        "outputId": "bc1ae276-fa2f-497b-ad2e-52f0f580bde4"
      },
      "source": [
        "# number of rows and columns in the dataset\n",
        "data_frame.shape"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(569, 31)"
            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "AWOjMuyBi77M",
        "outputId": "cfb00db2-b545-4964-e2ba-cd1b4d2a545a"
      },
      "source": [
        "# getting some information about the data\n",
        "data_frame.info()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "<class 'pandas.core.frame.DataFrame'>\n",
            "RangeIndex: 569 entries, 0 to 568\n",
            "Data columns (total 31 columns):\n",
            " #   Column                   Non-Null Count  Dtype  \n",
            "---  ------                   --------------  -----  \n",
            " 0   mean radius              569 non-null    float64\n",
            " 1   mean texture             569 non-null    float64\n",
            " 2   mean perimeter           569 non-null    float64\n",
            " 3   mean area                569 non-null    float64\n",
            " 4   mean smoothness          569 non-null    float64\n",
            " 5   mean compactness         569 non-null    float64\n",
            " 6   mean concavity           569 non-null    float64\n",
            " 7   mean concave points      569 non-null    float64\n",
            " 8   mean symmetry            569 non-null    float64\n",
            " 9   mean fractal dimension   569 non-null    float64\n",
            " 10  radius error             569 non-null    float64\n",
            " 11  texture error            569 non-null    float64\n",
            " 12  perimeter error          569 non-null    float64\n",
            " 13  area error               569 non-null    float64\n",
            " 14  smoothness error         569 non-null    float64\n",
            " 15  compactness error        569 non-null    float64\n",
            " 16  concavity error          569 non-null    float64\n",
            " 17  concave points error     569 non-null    float64\n",
            " 18  symmetry error           569 non-null    float64\n",
            " 19  fractal dimension error  569 non-null    float64\n",
            " 20  worst radius             569 non-null    float64\n",
            " 21  worst texture            569 non-null    float64\n",
            " 22  worst perimeter          569 non-null    float64\n",
            " 23  worst area               569 non-null    float64\n",
            " 24  worst smoothness         569 non-null    float64\n",
            " 25  worst compactness        569 non-null    float64\n",
            " 26  worst concavity          569 non-null    float64\n",
            " 27  worst concave points     569 non-null    float64\n",
            " 28  worst symmetry           569 non-null    float64\n",
            " 29  worst fractal dimension  569 non-null    float64\n",
            " 30  label                    569 non-null    int64  \n",
            "dtypes: float64(30), int64(1)\n",
            "memory usage: 137.9 KB\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RuoVIUTYjLpk",
        "outputId": "8bd8d9b6-abc9-44cf-8f7a-902374cf7f59"
      },
      "source": [
        "# checking for missing values\n",
        "data_frame.isnull().sum()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "mean radius                0\n",
              "mean texture               0\n",
              "mean perimeter             0\n",
              "mean area                  0\n",
              "mean smoothness            0\n",
              "mean compactness           0\n",
              "mean concavity             0\n",
              "mean concave points        0\n",
              "mean symmetry              0\n",
              "mean fractal dimension     0\n",
              "radius error               0\n",
              "texture error              0\n",
              "perimeter error            0\n",
              "area error                 0\n",
              "smoothness error           0\n",
              "compactness error          0\n",
              "concavity error            0\n",
              "concave points error       0\n",
              "symmetry error             0\n",
              "fractal dimension error    0\n",
              "worst radius               0\n",
              "worst texture              0\n",
              "worst perimeter            0\n",
              "worst area                 0\n",
              "worst smoothness           0\n",
              "worst compactness          0\n",
              "worst concavity            0\n",
              "worst concave points       0\n",
              "worst symmetry             0\n",
              "worst fractal dimension    0\n",
              "label                      0\n",
              "dtype: int64"
            ]
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 384
        },
        "id": "oLMuXI33jlkq",
        "outputId": "ace14843-0f63-4ccd-fc25-3c32a1dfd025"
      },
      "source": [
        "# statistical measures about the data\n",
        "data_frame.describe()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "       mean radius  mean texture  mean perimeter    mean area  \\\n",
              "count   569.000000    569.000000      569.000000   569.000000   \n",
              "mean     14.127292     19.289649       91.969033   654.889104   \n",
              "std       3.524049      4.301036       24.298981   351.914129   \n",
              "min       6.981000      9.710000       43.790000   143.500000   \n",
              "25%      11.700000     16.170000       75.170000   420.300000   \n",
              "50%      13.370000     18.840000       86.240000   551.100000   \n",
              "75%      15.780000     21.800000      104.100000   782.700000   \n",
              "max      28.110000     39.280000      188.500000  2501.000000   \n",
              "\n",
              "       mean smoothness  mean compactness  mean concavity  mean concave points  \\\n",
              "count       569.000000        569.000000      569.000000           569.000000   \n",
              "mean          0.096360          0.104341        0.088799             0.048919   \n",
              "std           0.014064          0.052813        0.079720             0.038803   \n",
              "min           0.052630          0.019380        0.000000             0.000000   \n",
              "25%           0.086370          0.064920        0.029560             0.020310   \n",
              "50%           0.095870          0.092630        0.061540             0.033500   \n",
              "75%           0.105300          0.130400        0.130700             0.074000   \n",
              "max           0.163400          0.345400        0.426800             0.201200   \n",
              "\n",
              "       mean symmetry  mean fractal dimension  ...  worst texture  \\\n",
              "count     569.000000              569.000000  ...     569.000000   \n",
              "mean        0.181162                0.062798  ...      25.677223   \n",
              "std         0.027414                0.007060  ...       6.146258   \n",
              "min         0.106000                0.049960  ...      12.020000   \n",
              "25%         0.161900                0.057700  ...      21.080000   \n",
              "50%         0.179200                0.061540  ...      25.410000   \n",
              "75%         0.195700                0.066120  ...      29.720000   \n",
              "max         0.304000                0.097440  ...      49.540000   \n",
              "\n",
              "       worst perimeter   worst area  worst smoothness  worst compactness  \\\n",
              "count       569.000000   569.000000        569.000000         569.000000   \n",
              "mean        107.261213   880.583128          0.132369           0.254265   \n",
              "std          33.602542   569.356993          0.022832           0.157336   \n",
              "min          50.410000   185.200000          0.071170           0.027290   \n",
              "25%          84.110000   515.300000          0.116600           0.147200   \n",
              "50%          97.660000   686.500000          0.131300           0.211900   \n",
              "75%         125.400000  1084.000000          0.146000           0.339100   \n",
              "max         251.200000  4254.000000          0.222600           1.058000   \n",
              "\n",
              "       worst concavity  worst concave points  worst symmetry  \\\n",
              "count       569.000000            569.000000      569.000000   \n",
              "mean          0.272188              0.114606        0.290076   \n",
              "std           0.208624              0.065732        0.061867   \n",
              "min           0.000000              0.000000        0.156500   \n",
              "25%           0.114500              0.064930        0.250400   \n",
              "50%           0.226700              0.099930        0.282200   \n",
              "75%           0.382900              0.161400        0.317900   \n",
              "max           1.252000              0.291000        0.663800   \n",
              "\n",
              "       worst fractal dimension       label  \n",
              "count               569.000000  569.000000  \n",
              "mean                  0.083946    0.627417  \n",
              "std                   0.018061    0.483918  \n",
              "min                   0.055040    0.000000  \n",
              "25%                   0.071460    0.000000  \n",
              "50%                   0.080040    1.000000  \n",
              "75%                   0.092080    1.000000  \n",
              "max                   0.207500    1.000000  \n",
              "\n",
              "[8 rows x 31 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-10f5adf9-5825-4f0b-b5a8-1e2ece73220b\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>mean radius</th>\n",
              "      <th>mean texture</th>\n",
              "      <th>mean perimeter</th>\n",
              "      <th>mean area</th>\n",
              "      <th>mean smoothness</th>\n",
              "      <th>mean compactness</th>\n",
              "      <th>mean concavity</th>\n",
              "      <th>mean concave points</th>\n",
              "      <th>mean symmetry</th>\n",
              "      <th>mean fractal dimension</th>\n",
              "      <th>...</th>\n",
              "      <th>worst texture</th>\n",
              "      <th>worst perimeter</th>\n",
              "      <th>worst area</th>\n",
              "      <th>worst smoothness</th>\n",
              "      <th>worst compactness</th>\n",
              "      <th>worst concavity</th>\n",
              "      <th>worst concave points</th>\n",
              "      <th>worst symmetry</th>\n",
              "      <th>worst fractal dimension</th>\n",
              "      <th>label</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>...</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "      <td>569.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>14.127292</td>\n",
              "      <td>19.289649</td>\n",
              "      <td>91.969033</td>\n",
              "      <td>654.889104</td>\n",
              "      <td>0.096360</td>\n",
              "      <td>0.104341</td>\n",
              "      <td>0.088799</td>\n",
              "      <td>0.048919</td>\n",
              "      <td>0.181162</td>\n",
              "      <td>0.062798</td>\n",
              "      <td>...</td>\n",
              "      <td>25.677223</td>\n",
              "      <td>107.261213</td>\n",
              "      <td>880.583128</td>\n",
              "      <td>0.132369</td>\n",
              "      <td>0.254265</td>\n",
              "      <td>0.272188</td>\n",
              "      <td>0.114606</td>\n",
              "      <td>0.290076</td>\n",
              "      <td>0.083946</td>\n",
              "      <td>0.627417</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>3.524049</td>\n",
              "      <td>4.301036</td>\n",
              "      <td>24.298981</td>\n",
              "      <td>351.914129</td>\n",
              "      <td>0.014064</td>\n",
              "      <td>0.052813</td>\n",
              "      <td>0.079720</td>\n",
              "      <td>0.038803</td>\n",
              "      <td>0.027414</td>\n",
              "      <td>0.007060</td>\n",
              "      <td>...</td>\n",
              "      <td>6.146258</td>\n",
              "      <td>33.602542</td>\n",
              "      <td>569.356993</td>\n",
              "      <td>0.022832</td>\n",
              "      <td>0.157336</td>\n",
              "      <td>0.208624</td>\n",
              "      <td>0.065732</td>\n",
              "      <td>0.061867</td>\n",
              "      <td>0.018061</td>\n",
              "      <td>0.483918</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>6.981000</td>\n",
              "      <td>9.710000</td>\n",
              "      <td>43.790000</td>\n",
              "      <td>143.500000</td>\n",
              "      <td>0.052630</td>\n",
              "      <td>0.019380</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.106000</td>\n",
              "      <td>0.049960</td>\n",
              "      <td>...</td>\n",
              "      <td>12.020000</td>\n",
              "      <td>50.410000</td>\n",
              "      <td>185.200000</td>\n",
              "      <td>0.071170</td>\n",
              "      <td>0.027290</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.156500</td>\n",
              "      <td>0.055040</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>11.700000</td>\n",
              "      <td>16.170000</td>\n",
              "      <td>75.170000</td>\n",
              "      <td>420.300000</td>\n",
              "      <td>0.086370</td>\n",
              "      <td>0.064920</td>\n",
              "      <td>0.029560</td>\n",
              "      <td>0.020310</td>\n",
              "      <td>0.161900</td>\n",
              "      <td>0.057700</td>\n",
              "      <td>...</td>\n",
              "      <td>21.080000</td>\n",
              "      <td>84.110000</td>\n",
              "      <td>515.300000</td>\n",
              "      <td>0.116600</td>\n",
              "      <td>0.147200</td>\n",
              "      <td>0.114500</td>\n",
              "      <td>0.064930</td>\n",
              "      <td>0.250400</td>\n",
              "      <td>0.071460</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>13.370000</td>\n",
              "      <td>18.840000</td>\n",
              "      <td>86.240000</td>\n",
              "      <td>551.100000</td>\n",
              "      <td>0.095870</td>\n",
              "      <td>0.092630</td>\n",
              "      <td>0.061540</td>\n",
              "      <td>0.033500</td>\n",
              "      <td>0.179200</td>\n",
              "      <td>0.061540</td>\n",
              "      <td>...</td>\n",
              "      <td>25.410000</td>\n",
              "      <td>97.660000</td>\n",
              "      <td>686.500000</td>\n",
              "      <td>0.131300</td>\n",
              "      <td>0.211900</td>\n",
              "      <td>0.226700</td>\n",
              "      <td>0.099930</td>\n",
              "      <td>0.282200</td>\n",
              "      <td>0.080040</td>\n",
              "      <td>1.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>15.780000</td>\n",
              "      <td>21.800000</td>\n",
              "      <td>104.100000</td>\n",
              "      <td>782.700000</td>\n",
              "      <td>0.105300</td>\n",
              "      <td>0.130400</td>\n",
              "      <td>0.130700</td>\n",
              "      <td>0.074000</td>\n",
              "      <td>0.195700</td>\n",
              "      <td>0.066120</td>\n",
              "      <td>...</td>\n",
              "      <td>29.720000</td>\n",
              "      <td>125.400000</td>\n",
              "      <td>1084.000000</td>\n",
              "      <td>0.146000</td>\n",
              "      <td>0.339100</td>\n",
              "      <td>0.382900</td>\n",
              "      <td>0.161400</td>\n",
              "      <td>0.317900</td>\n",
              "      <td>0.092080</td>\n",
              "      <td>1.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>28.110000</td>\n",
              "      <td>39.280000</td>\n",
              "      <td>188.500000</td>\n",
              "      <td>2501.000000</td>\n",
              "      <td>0.163400</td>\n",
              "      <td>0.345400</td>\n",
              "      <td>0.426800</td>\n",
              "      <td>0.201200</td>\n",
              "      <td>0.304000</td>\n",
              "      <td>0.097440</td>\n",
              "      <td>...</td>\n",
              "      <td>49.540000</td>\n",
              "      <td>251.200000</td>\n",
              "      <td>4254.000000</td>\n",
              "      <td>0.222600</td>\n",
              "      <td>1.058000</td>\n",
              "      <td>1.252000</td>\n",
              "      <td>0.291000</td>\n",
              "      <td>0.663800</td>\n",
              "      <td>0.207500</td>\n",
              "      <td>1.000000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>8 rows × 31 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-10f5adf9-5825-4f0b-b5a8-1e2ece73220b')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-10f5adf9-5825-4f0b-b5a8-1e2ece73220b button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-10f5adf9-5825-4f0b-b5a8-1e2ece73220b');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-13b82e3c-e47e-44f7-963a-786cb15caf22\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-13b82e3c-e47e-44f7-963a-786cb15caf22')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-13b82e3c-e47e-44f7-963a-786cb15caf22 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe"
            }
          },
          "metadata": {},
          "execution_count": 10
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tC8Yii4Yjzer",
        "outputId": "eb25395b-8a49-46fa-f5b5-bfe744d2daec"
      },
      "source": [
        "# checking the distribution of Target Varibale\n",
        "data_frame['label'].value_counts()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "label\n",
              "1    357\n",
              "0    212\n",
              "Name: count, dtype: int64"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HbbkzWeFkjqc"
      },
      "source": [
        "1 --> Benign\n",
        "\n",
        "0 --> Malignant"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 228
        },
        "id": "YGWHjrVSkN5c",
        "outputId": "27f11702-ee47-446f-ea58-6eecd00dbceb"
      },
      "source": [
        "data_frame.groupby('label').mean()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "       mean radius  mean texture  mean perimeter   mean area  mean smoothness  \\\n",
              "label                                                                           \n",
              "0        17.462830     21.604906      115.365377  978.376415         0.102898   \n",
              "1        12.146524     17.914762       78.075406  462.790196         0.092478   \n",
              "\n",
              "       mean compactness  mean concavity  mean concave points  mean symmetry  \\\n",
              "label                                                                         \n",
              "0              0.145188        0.160775             0.087990       0.192909   \n",
              "1              0.080085        0.046058             0.025717       0.174186   \n",
              "\n",
              "       mean fractal dimension  ...  worst radius  worst texture  \\\n",
              "label                          ...                                \n",
              "0                    0.062680  ...     21.134811      29.318208   \n",
              "1                    0.062867  ...     13.379801      23.515070   \n",
              "\n",
              "       worst perimeter   worst area  worst smoothness  worst compactness  \\\n",
              "label                                                                      \n",
              "0           141.370330  1422.286321          0.144845           0.374824   \n",
              "1            87.005938   558.899440          0.124959           0.182673   \n",
              "\n",
              "       worst concavity  worst concave points  worst symmetry  \\\n",
              "label                                                          \n",
              "0             0.450606              0.182237        0.323468   \n",
              "1             0.166238              0.074444        0.270246   \n",
              "\n",
              "       worst fractal dimension  \n",
              "label                           \n",
              "0                     0.091530  \n",
              "1                     0.079442  \n",
              "\n",
              "[2 rows x 30 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-d550fdd7-92d9-49b9-b2e5-711b3fada167\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>mean radius</th>\n",
              "      <th>mean texture</th>\n",
              "      <th>mean perimeter</th>\n",
              "      <th>mean area</th>\n",
              "      <th>mean smoothness</th>\n",
              "      <th>mean compactness</th>\n",
              "      <th>mean concavity</th>\n",
              "      <th>mean concave points</th>\n",
              "      <th>mean symmetry</th>\n",
              "      <th>mean fractal dimension</th>\n",
              "      <th>...</th>\n",
              "      <th>worst radius</th>\n",
              "      <th>worst texture</th>\n",
              "      <th>worst perimeter</th>\n",
              "      <th>worst area</th>\n",
              "      <th>worst smoothness</th>\n",
              "      <th>worst compactness</th>\n",
              "      <th>worst concavity</th>\n",
              "      <th>worst concave points</th>\n",
              "      <th>worst symmetry</th>\n",
              "      <th>worst fractal dimension</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>label</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>17.462830</td>\n",
              "      <td>21.604906</td>\n",
              "      <td>115.365377</td>\n",
              "      <td>978.376415</td>\n",
              "      <td>0.102898</td>\n",
              "      <td>0.145188</td>\n",
              "      <td>0.160775</td>\n",
              "      <td>0.087990</td>\n",
              "      <td>0.192909</td>\n",
              "      <td>0.062680</td>\n",
              "      <td>...</td>\n",
              "      <td>21.134811</td>\n",
              "      <td>29.318208</td>\n",
              "      <td>141.370330</td>\n",
              "      <td>1422.286321</td>\n",
              "      <td>0.144845</td>\n",
              "      <td>0.374824</td>\n",
              "      <td>0.450606</td>\n",
              "      <td>0.182237</td>\n",
              "      <td>0.323468</td>\n",
              "      <td>0.091530</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>12.146524</td>\n",
              "      <td>17.914762</td>\n",
              "      <td>78.075406</td>\n",
              "      <td>462.790196</td>\n",
              "      <td>0.092478</td>\n",
              "      <td>0.080085</td>\n",
              "      <td>0.046058</td>\n",
              "      <td>0.025717</td>\n",
              "      <td>0.174186</td>\n",
              "      <td>0.062867</td>\n",
              "      <td>...</td>\n",
              "      <td>13.379801</td>\n",
              "      <td>23.515070</td>\n",
              "      <td>87.005938</td>\n",
              "      <td>558.899440</td>\n",
              "      <td>0.124959</td>\n",
              "      <td>0.182673</td>\n",
              "      <td>0.166238</td>\n",
              "      <td>0.074444</td>\n",
              "      <td>0.270246</td>\n",
              "      <td>0.079442</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>2 rows × 30 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d550fdd7-92d9-49b9-b2e5-711b3fada167')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-d550fdd7-92d9-49b9-b2e5-711b3fada167 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-d550fdd7-92d9-49b9-b2e5-711b3fada167');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-1ba42d99-8c91-43d8-8d72-7eaa8b6a336f\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-1ba42d99-8c91-43d8-8d72-7eaa8b6a336f')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-1ba42d99-8c91-43d8-8d72-7eaa8b6a336f button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe"
            }
          },
          "metadata": {},
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "tUPYps4DlVFR"
      },
      "source": [
        "Separating the features and target"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Z5pD8rP5kzKD"
      },
      "source": [
        "X = data_frame.drop(columns='label', axis=1)\n",
        "Y = data_frame['label']"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Z-GWJHpAlpWJ",
        "outputId": "e636ca34-0083-4b1a-a4d2-4ba37db779e8"
      },
      "source": [
        "print(X)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \\\n",
            "0          17.99         10.38          122.80     1001.0          0.11840   \n",
            "1          20.57         17.77          132.90     1326.0          0.08474   \n",
            "2          19.69         21.25          130.00     1203.0          0.10960   \n",
            "3          11.42         20.38           77.58      386.1          0.14250   \n",
            "4          20.29         14.34          135.10     1297.0          0.10030   \n",
            "..           ...           ...             ...        ...              ...   \n",
            "564        21.56         22.39          142.00     1479.0          0.11100   \n",
            "565        20.13         28.25          131.20     1261.0          0.09780   \n",
            "566        16.60         28.08          108.30      858.1          0.08455   \n",
            "567        20.60         29.33          140.10     1265.0          0.11780   \n",
            "568         7.76         24.54           47.92      181.0          0.05263   \n",
            "\n",
            "     mean compactness  mean concavity  mean concave points  mean symmetry  \\\n",
            "0             0.27760         0.30010              0.14710         0.2419   \n",
            "1             0.07864         0.08690              0.07017         0.1812   \n",
            "2             0.15990         0.19740              0.12790         0.2069   \n",
            "3             0.28390         0.24140              0.10520         0.2597   \n",
            "4             0.13280         0.19800              0.10430         0.1809   \n",
            "..                ...             ...                  ...            ...   \n",
            "564           0.11590         0.24390              0.13890         0.1726   \n",
            "565           0.10340         0.14400              0.09791         0.1752   \n",
            "566           0.10230         0.09251              0.05302         0.1590   \n",
            "567           0.27700         0.35140              0.15200         0.2397   \n",
            "568           0.04362         0.00000              0.00000         0.1587   \n",
            "\n",
            "     mean fractal dimension  ...  worst radius  worst texture  \\\n",
            "0                   0.07871  ...        25.380          17.33   \n",
            "1                   0.05667  ...        24.990          23.41   \n",
            "2                   0.05999  ...        23.570          25.53   \n",
            "3                   0.09744  ...        14.910          26.50   \n",
            "4                   0.05883  ...        22.540          16.67   \n",
            "..                      ...  ...           ...            ...   \n",
            "564                 0.05623  ...        25.450          26.40   \n",
            "565                 0.05533  ...        23.690          38.25   \n",
            "566                 0.05648  ...        18.980          34.12   \n",
            "567                 0.07016  ...        25.740          39.42   \n",
            "568                 0.05884  ...         9.456          30.37   \n",
            "\n",
            "     worst perimeter  worst area  worst smoothness  worst compactness  \\\n",
            "0             184.60      2019.0           0.16220            0.66560   \n",
            "1             158.80      1956.0           0.12380            0.18660   \n",
            "2             152.50      1709.0           0.14440            0.42450   \n",
            "3              98.87       567.7           0.20980            0.86630   \n",
            "4             152.20      1575.0           0.13740            0.20500   \n",
            "..               ...         ...               ...                ...   \n",
            "564           166.10      2027.0           0.14100            0.21130   \n",
            "565           155.00      1731.0           0.11660            0.19220   \n",
            "566           126.70      1124.0           0.11390            0.30940   \n",
            "567           184.60      1821.0           0.16500            0.86810   \n",
            "568            59.16       268.6           0.08996            0.06444   \n",
            "\n",
            "     worst concavity  worst concave points  worst symmetry  \\\n",
            "0             0.7119                0.2654          0.4601   \n",
            "1             0.2416                0.1860          0.2750   \n",
            "2             0.4504                0.2430          0.3613   \n",
            "3             0.6869                0.2575          0.6638   \n",
            "4             0.4000                0.1625          0.2364   \n",
            "..               ...                   ...             ...   \n",
            "564           0.4107                0.2216          0.2060   \n",
            "565           0.3215                0.1628          0.2572   \n",
            "566           0.3403                0.1418          0.2218   \n",
            "567           0.9387                0.2650          0.4087   \n",
            "568           0.0000                0.0000          0.2871   \n",
            "\n",
            "     worst fractal dimension  \n",
            "0                    0.11890  \n",
            "1                    0.08902  \n",
            "2                    0.08758  \n",
            "3                    0.17300  \n",
            "4                    0.07678  \n",
            "..                       ...  \n",
            "564                  0.07115  \n",
            "565                  0.06637  \n",
            "566                  0.07820  \n",
            "567                  0.12400  \n",
            "568                  0.07039  \n",
            "\n",
            "[569 rows x 30 columns]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "r5rs8pColqsn",
        "outputId": "52c8d4d0-0d59-48b8-f5ed-4c1110e9fbb0"
      },
      "source": [
        "print(Y)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0      0\n",
            "1      0\n",
            "2      0\n",
            "3      0\n",
            "4      0\n",
            "      ..\n",
            "564    0\n",
            "565    0\n",
            "566    0\n",
            "567    0\n",
            "568    1\n",
            "Name: label, Length: 569, dtype: int64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B5yk9d-Nl4VV"
      },
      "source": [
        "Splitting the data into training data & Testing data"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vZtU30bPluG_"
      },
      "source": [
        "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zWaeuX3amqYH",
        "outputId": "d11c8d7d-d692-42da-a8ff-e6fa265833b0"
      },
      "source": [
        "print(X.shape, X_train.shape, X_test.shape)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(569, 30) (455, 30) (114, 30)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ttUvP8-im6z3"
      },
      "source": [
        "Model Training"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "OPPOng9Em8eb"
      },
      "source": [
        "Logistic Regression"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xhBPDKZmm0dk"
      },
      "source": [
        "model = LogisticRegression()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 214
        },
        "id": "NkQlXVftnKOm",
        "outputId": "7df222be-ce11-4bfd-d6ed-cb88ef826832"
      },
      "source": [
        "# training the Logistic Regression model using Training data\n",
        "\n",
        "model.fit(X_train, Y_train)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
            "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
            "\n",
            "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
            "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
            "Please also refer to the documentation for alternative solver options:\n",
            "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
            "  n_iter_i = _check_optimize_result(\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "LogisticRegression()"
            ],
            "text/html": [
              "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div>"
            ]
          },
          "metadata": {},
          "execution_count": 17
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_Wvez7R5nj5P"
      },
      "source": [
        "Model Evaluation"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Um1zCUo1nmI1"
      },
      "source": [
        "Accuracy Score"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Xy_Ob3Zcnh32"
      },
      "source": [
        "# accuracy on training data\n",
        "X_train_prediction = model.predict(X_train)\n",
        "training_data_accuracy = accuracy_score(Y_train, X_train_prediction)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fsx-3RlfoMzN",
        "outputId": "a22a96a0-4401-4ae3-e471-b08af249041f"
      },
      "source": [
        "print('Accuracy on training data = ', training_data_accuracy)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy on training data =  0.9472527472527472\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1QYDU6YjoTFl"
      },
      "source": [
        "# accuracy on test data\n",
        "X_test_prediction = model.predict(X_test)\n",
        "test_data_accuracy = accuracy_score(Y_test, X_test_prediction)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "7HbERDa8orMw",
        "outputId": "80b2396b-cf7e-4966-f621-fec65bdc360c"
      },
      "source": [
        "print('Accuracy on test data = ', test_data_accuracy)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy on test data =  0.9298245614035088\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "wDV18UAJpB2B"
      },
      "source": [
        "Building a Predictive System"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Rf_x3hPpouQh",
        "outputId": "8600bdc7-b3da-4525-dadb-ed4fcbd195e4"
      },
      "source": [
        "input_data = (13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,0.2699,0.7886,2.058,23.56,0.008462,0.0146,0.02387,0.01315,0.0198,0.0023,15.11,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259)\n",
        "\n",
        "# change the input data to a numpy array\n",
        "input_data_as_numpy_array = np.asarray(input_data)\n",
        "\n",
        "# reshape the numpy array as we are predicting for one datapoint\n",
        "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
        "\n",
        "prediction = model.predict(input_data_reshaped)\n",
        "print(prediction)\n",
        "\n",
        "if (prediction[0] == 0):\n",
        "  print('The Breast cancer is Malignant')\n",
        "\n",
        "else:\n",
        "  print('The Breast Cancer is Benign')\n",
        "\n"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[1]\n",
            "The Breast Cancer is Benign\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
            "  warnings.warn(\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "tZVUPp5hqJn1"
      },
      "source": [],
      "execution_count": null,
      "outputs": []
    }
  ]
}